#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# vim: tabstop=4 shiftwidth=4 softtabstop=4
#
# Copyright (C) 2017-2019 GEM Foundation
#
# OpenQuake is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# OpenQuake is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with OpenQuake.  If not, see <http://www.gnu.org/licenses/>.
import sys
import csv
from openquake.baselib import sap
from openquake.hazardlib import valid


def extract_sites(file_csv, sites_csv):
    """
    Produce a good `sites_csv` file from a generic CSV with fields longitude
    and latitude, by truncating to 5 digits and discarding duplicate sites.
    """
    nrows = 0
    with open(file_csv) as f:
        reader = csv.reader(f)
        header = next(reader)
        for i, field in enumerate(header):
            col = field.lower()
            if col in ('lon', 'longitude'):
                lon_idx = i
            if col in ('lat', 'latitude'):
                lat_idx = i
        try:
            lon_idx
        except NameError:
            sys.exit('No longitude field in the header of %s!' % file_csv)
        try:
            lat_idx
        except NameError:
            sys.exit('No latitude field in the header of %s!' % file_csv)
        points = set()
        for row in reader:
            point = valid.lon_lat('%s %s' % (row[lon_idx], row[lat_idx]))
            points.add(point)
            nrows += 1
    with open(sites_csv, 'w') as f:
        for point in sorted(points):
            f.write('%s,%s\n' % point)
    print('Written %d sites on %s' % (len(points), sites_csv))
    if len(points) < nrows:
        print('Found %d duplicated sites!' % (nrows - len(points)))


extract_sites.file_csv = 'CSV file with lon and lat in the header'
extract_sites.sites_csv = 'name of the normalized sites.csv file'

if __name__ == '__main__':
    sap.run(extract_sites)
